Cases

The analysis of the case-only data

Brian Muchmore https://github.com/bmuchmore (GENYO)http://www.genyo.es/
December 03, 2018

NOTE

Given the same data, the following commands should yield almost identical results to the results being shown with any discrepancies being stochastic in nature. Some of these commands, however, can take a long time to run, so while we show the commands here as we originally ran them, results are often being read back from file. If you are trying to recapitulate these results using the exact data and code being used here and are running into problems or incongruitous results, please submit an issue, and we will address it as soon as possible.

Package Information

These are the packages I will be using.


library(PreciseDist)
library(future)
library(doFuture)
library(readr)
library(heatmaply)

This is the session info.


sessionInfo()

R version 3.4.4 (2018-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.1 LTS

Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets 
[7] methods   base     

other attached packages:
 [1] heatmaply_0.15.2       viridis_0.5.1         
 [3] viridisLite_0.3.0      plotly_4.7.1          
 [5] ggplot2_3.0.0          readr_1.1.1           
 [7] doFuture_0.6.0         iterators_1.0.10      
 [9] foreach_1.4.4          future_1.9.0          
[11] PreciseDist_0.0.0.9000

loaded via a namespace (and not attached):
  [1] R.utils_2.6.0           tidyselect_0.2.4       
  [3] htmlwidgets_1.2         TSP_1.1-6              
  [5] trimcluster_0.1-2       grid_3.4.4             
  [7] ranger_0.10.1           Rtsne_0.13             
  [9] munsell_0.5.0           codetools_0.2-15       
 [11] SNFtool_2.3.0           miniUI_0.1.1.1         
 [13] misc3d_0.8-4            withr_2.1.2            
 [15] colorspace_1.3-2        longitudinalData_2.4.1 
 [17] Boruta_6.0.0            knitr_1.20             
 [19] geometry_0.3-6          stats4_3.4.4           
 [21] robustbase_0.93-1       dtw_1.20-1             
 [23] dimRed_0.1.0            DistatisR_1.0          
 [25] listenv_0.7.0           radix_0.5.0.9001       
 [27] DistributionUtils_0.5-1 rprojroot_1.3-2        
 [29] locpol_0.7-0            ipred_0.9-6            
 [31] randomForest_4.6-14     gclus_1.3.1            
 [33] diptest_0.75-7          R6_2.2.2               
 [35] seriation_1.2-3         fields_9.6             
 [37] rpivotTable_0.3.0       flexmix_2.3-14         
 [39] manipulateWidget_0.10.0 DRR_0.0.3              
 [41] bitops_1.0-6            assertthat_0.2.0       
 [43] promises_1.0.1          networkD3_0.4          
 [45] SDMTools_1.1-221        scales_1.0.0           
 [47] nnet_7.3-12             mmtsne_0.1.0           
 [49] gtable_0.2.0            ddalpha_1.3.4          
 [51] globals_0.12.1          spam_2.2-0             
 [53] timeDate_3043.102       rlang_0.2.2            
 [55] CVST_0.2-2              RcppRoll_0.3.0         
 [57] profileModel_0.5-9      splines_3.4.4          
 [59] lazyeval_0.2.1          ModelMetrics_1.1.0     
 [61] princurve_2.1.0         trelliscopejs_0.1.14   
 [63] broom_0.5.0             checkmate_1.8.5        
 [65] heatmap.plus_1.3        rgl_0.99.16            
 [67] yaml_2.2.0              reshape2_1.4.3         
 [69] abind_1.4-5             threejs_0.3.1          
 [71] crosstalk_1.0.0         backports_1.1.3        
 [73] httpuv_1.4.4.2          caret_6.0-80           
 [75] tools_3.4.4             lava_1.6.2             
 [77] infer_0.3.1             gplots_3.0.1           
 [79] RColorBrewer_1.1-2      proxy_0.4-22           
 [81] BiocGenerics_0.24.0     analogue_0.17-0        
 [83] Rcpp_0.12.18            splus2R_1.2-2          
 [85] plyr_1.8.4              visNetwork_2.0.4       
 [87] base64enc_0.1-3         progress_1.2.0         
 [89] purrr_0.2.5             prettyunits_1.0.2      
 [91] rpart_4.1-13            diffusr_0.1.4          
 [93] zoo_1.8-3               sfsmisc_1.1-2          
 [95] cluster_2.0.7-1         magrittr_1.5           
 [97] data.table_1.11.4       TSclust_1.2.4          
 [99] mvtnorm_1.0-8           whisker_0.3-2          
[101] matrixStats_0.53.1      hms_0.4.2              
[103] NetPreProc_1.1          mime_0.6               
[105] evaluate_0.11           xtable_1.8-3           
[107] mclust_5.4.1            gridExtra_2.3          
[109] compiler_3.4.4          tibble_1.4.2           
[111] maps_3.3.0              mgc_1.0.1              
[113] KernSmooth_2.23-15      crayon_1.3.4           
[115] R.oo_1.22.0             htmltools_0.3.6        
[117] mgcv_1.8-23             later_0.7.3            
[119] tidyr_0.8.1             RcppParallel_4.4.1     
[121] lubridate_1.7.4         magic_1.5-8            
[123] fpc_2.1-11              autocogs_0.0.1         
[125] MASS_7.3-49             Matrix_1.2-14          
[127] permute_0.9-4           gdata_2.18.0           
[129] wmtsa_2.0-3             R.methodsS3_1.7.1      
[131] dotCall64_1.0-0         bindr_0.1.1            
[133] gower_0.1.2             igraph_1.2.1           
[135] ifultools_2.0-4         pkgconfig_2.0.1        
[137] registry_0.5            brglm_0.6.1            
[139] ExPosition_2.8.19       philentropy_0.2.0      
[141] microbenchmark_1.4-4    recipes_0.1.3          
[143] clv_0.3-2.1             webshot_0.5.0          
[145] prodlim_2018.04.18      LPStimeSeries_1.0-5    
[147] stringr_1.3.1           digest_0.6.18          
[149] pls_2.6-0               vegan_2.5-2            
[151] graph_1.56.0            rmarkdown_1.10         
[153] dendextend_1.8.0        uwot_0.0.0.9004        
[155] kernlab_0.9-26          gtools_3.8.1           
[157] modeltools_0.2-22       shiny_1.1.0            
[159] nlme_3.1-131            glasso_1.10            
[161] jsonlite_1.5            bindrcpp_0.2.2         
[163] alluvial_0.1-2          TSdist_3.4             
[165] pillar_1.3.0            lattice_0.20-35        
[167] httr_1.3.1              DEoptimR_1.0-8         
[169] survival_2.41-3         glue_1.3.0             
[171] xts_0.11-0              prabclus_2.2-6         
[173] class_7.3-14            stringi_1.2.3          
[175] pdc_1.0.3               KODAMA_1.5             
[177] rsample_0.0.2           caTools_1.17.1         
[179] dplyr_0.7.6             hglasso_1.2            

library(future)
library(doFuture)
options(future.globals.maxSize = +Inf)
registerDoFuture()
plan(multicore, workers = 16)
flow_case_dists <- flow_case_data %>%
  as.matrix() %>%
  precise_dist(
    dists = "all_dists",
    suffix = "",
    file = "/home/brian/Desktop/flow/flow_case_dists.rds",
    parallel = TRUE,
    local_timeout = Inf,
    verbose = TRUE
  )
flow_case_dists <- read_rds("/home/brian/Desktop/flow/flow_case_dists.rds")

flow_case_distances <- flow_case_dists %>%
  precise_transform(enforce_dist = TRUE)

library(future)
library(doFuture)
options(future.globals.maxSize = +Inf)
registerDoFuture()
plan(multicore, workers = 4)
flow_case_umap <- precise_umap(
  data = flow_case_distances,
  distance = TRUE,
  n_neighbors = 5,
  spread = 10,
  min_dist = 0.1,
  bandwidth = 1,
  type = "plotly",
  color_vec = NULL,
  colors = NULL,
  parallel = TRUE,
  verbose = TRUE
)

precise_trellis(flow_case_umap, path = paste0(getwd(), "/trellis_flow_case_umap"), self_contained = TRUE)

flow_case_distance_correlations <- precise_correlations(
  data = flow_case_distances,
  method = "pearson",
  permutations = 50,
  parallel = FALSE,
  verbose = TRUE
)

heatmaply(flow_case_distance_correlations$statistic)

flow_case_soergel_dists <- flow_case_data %>%
  as.matrix() %>%
  precise_dist(
    dists = "soergel",
    suffix = "",
    partitions = 10,
    file = "/home/brian/Desktop/flow/flow_case_soergel_dists.rds",
    parallel = FALSE,
    local_timeout = Inf,
    verbose = TRUE
  )

library(future)
library(doFuture)
options(future.globals.maxSize = +Inf)
registerDoFuture()
plan(multicore, workers = 10)
flow_case_soergel_fusion <- flow_case_soergel_dists %>%
  precise_transform(return_list = TRUE) %>%
  precise_transform(enforce_sim = TRUE) %>%
  precise_transform(fixed_k = 100) %>%
  precise_transform(transform = "laplacian", parallel = TRUE) %>%
  precise_transform(enforce_dist = TRUE) %>%
  precise_fusion(fusion = "fuse")

library(future)
library(doFuture)
options(future.globals.maxSize = +Inf)
registerDoFuture()
plan(multicore, workers = 8)
flow_case_soergel_graph <- precise_graph(
  data = flow_case_soergel_fusion,
  method = 5,
  distance = FALSE,
  n_neighbors = 75,
  spread = 1,
  min_dist = 0.0,
  bandwidth = 1,
  parallel = TRUE,
  verbose = TRUE
)

flow_case_soergel_2d_plot <- precise_viz(
  data = flow_case_soergel_graph,
  plot_type = "drl_2d_plot",
  k = 50,
  jitter = 2.5,
  color_vec = NULL,
  colors = NULL,
  size = 0.5,
  graphml = NULL,
  html = NULL,
  verbose = TRUE
)

flow_case_soergel_2d_plot$visual_output

library(cluster)
flow_case_soergel_clusters <- flow_case_soergel_viz$plot_layout %>%
  cluster::pam(
    k = 10,
    diss = FALSE,
    metric = NULL,
    medoids = NULL,
    stand = FALSE,
    cluster.only = TRUE,
    do.swap = TRUE,
    keep.diss = FALSE,
    keep.data = FALSE,
    pamonce = FALSE,
    trace.lev = 0
  ) %>%
  as.character() %>%
  map_chr(~paste0("cluster_", .x)) %>%
  as_tibble() %>%
  select(pam_clusters = value)

flow_case_soergel_descriptors <- precise_descriptors(
  flow_case_soergel_2d_plot,
  descriptors = cbind(flow_case_soergel_clusters, flow_case_data),
  verbose = TRUE,
  rank = TRUE,
  size = 0.5
)

precise_trellis(flow_case_soergel_descriptors, path = paste0(getwd(), "/trellis_flow_case_soergel_descriptors"), self_contained = TRUE)

library(future)
library(doFuture)
options(future.globals.maxSize = +Inf)
registerDoFuture()
plan(multicore, workers = 16)
features2 <- precise_features(
  data = flow_case_data,
  # data = clinical_data,
  outcome = flow_case_soergel_clusters,
  method = c("anova"),
  trees = 500,
  runs = 50,
  # cv = 10,
  # repeats = 1,
  parallel = TRUE,
  verbose = TRUE
)